Examples of different types of plots and how to make them.

Built with R 3.5.0 on June 24 2018


Setting up

# load splot
library('splot')

# simulate some data, just for these examples
n=1000
data=data.frame(by=rep(0:1,each=n/2),bet1=rep_len(0:1,n),bet2=sample(0:1,n,TRUE))
data$x=with(data,
  rnorm(n)+by*-.4+by*bet1*-.3+by*bet2*.3+bet1*bet2*.9-.8+rnorm(n,0,by)
)
data$y=with(data,
  x*.2+by*.3+bet2*-.6+bet1*bet2*.8+x*by*bet1*-.5+x*by*bet1*bet2*-.5
  +rnorm(n,5)+rnorm(n,-1,.1*x^2)
)
data$group=sample(c('a','b','c'),n,TRUE)
data$id=rep(seq_len(n/2),2)

attach(data)

Density plots

# when there is only one level of y, a histogram will also be drawn
splot(y)

# plot the y variable at levels of another variable
splot(y, by=x)

# enter a matrix-like object as y to plot multiple variables
splot(cbind(x,y))

# x is ignored if type is specified as density
splot(cbind(x,y)~by*bet1, type='density')


Line and bar plots

# if y is categorical, entering it with no x will show a bar plot by default
splot(group)

# when x is non-numerical, or has fewer unique values than lim, it is treated as categorical
splot(y~group)

# lines are the default, but any line plot can also be represented with bars
splot(y~group, type='bar')

# for bar and line plots, levels of x can be renamed and sorted
splot(y~group, type='bar', levels=list(group=list(c('d','e','f'),c(2,3,1))))

# multiple variable can also be displayed with lines or bars
# often you'll want to center and/or scale these for more direct comparison
splot(cbind(x,y)~bet1, mv.scale=TRUE)

# These can also be moved to the x axis, in which case x is moved to by
splot(cbind(x,y)~bet1, mv.scale=TRUE, mv.as.x=TRUE)


Scatter plots

# any bar or line plot can also be represented as a scatter plot
# this shows the actual data, rather than summaries
# xlas sets the orientation of the x-axis labels; they default to vertical at the moment
splot(cbind(x,y)~bet1, mv.scale=TRUE, mv.as.x=TRUE, xlas=1, type='scatter')

# You can also plot categorical variables in the y position
splot(group~y)

# if y has only two levels, you can display predicted probability
splot(by~y, line='probability')

# usually it's more interesting to look at two continuous variables
splot(y~x)

# for different lines, you can fit different types of models; either loess or spline
splot(y~x, line=loess)

# or adjust the linear model with transformations of x
splot(y~x+x^2+x^3)

# you can use other variables to break these up as well
# the line adjusting terms can also just be added to the end of the formula,
# but are added as cov here to make it easier to look at
splot(y~x*by, cov=x^2+x^3)

splot(y~x*by*bet1, cov=x^2+x^3)

splot(y~x*by*bet1*bet2, cov=x^2+x^3, levels=list(bet1=c(2,1)))


Color assignment

# gradients can be applied with a numeric colorby
splot(y, colorby=y)

# levels of by are assigned different seed colors
splot(y~x*by, colorby=y)

# if x is categorical and there is no by overall or in colorby,
# different seed colors are assigned to each class
splot(y~x, colorby=group)

# for bar plots, or line plots with more than one by level,
# colors are assigned by averages of numeric colorbys,
# or the most common class of non-numeric colorbys
splot(y~x, colorby=group, type='bar')

# if there are many levels of by, you may want to color in a gradient
splot(y~by*id, lim=FALSE, colorby=y)

# and maybe add groupings; method='related' or shuffle=TRUE
# in colorby makes lines of the same class different shades
splot(y~by*id, lim=FALSE, colorby=list(bet1, method='related'))


Brought to you by the Language Use and Social Interaction lab at Texas Tech University

Brought to you by the Language Use and Social Interaction lab at Texas Tech University